In [1]:
from IPython.core.display import HTML,Image
HTML('''<script> code_show=true;  function code_toggle() {  if (code_show){  $('div.input').hide();  } else {  $('div.input').show();  }  code_show = !code_show }  $( document ).ready(code_toggle); </script> <form action='javascript:code_toggle()'><input type='submit' value='Toggle Code'></form>''')
Out[1]:
In [2]:
import warnings
warnings.filterwarnings('ignore')
import gc, argparse, sys, os, errno
%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt;
import seaborn as sns
#sns.set()
#sns.set_style('whitegrid')
import h5py
from PIL import Image
import os
from tqdm import tqdm_notebook as tqdm
import scipy
import sklearn
from scipy.stats import pearsonr
from scipy.io import loadmat
import IPython.display as ipd
import IPython
import librosa.display
import librosa
from pystoi import stoi
Populating the interactive namespace from numpy and matplotlib
In [3]:
def plot_stft(audio,ax=None,n_fft=256,hop_length=128,show=False,n_mels=128,y_axis='mel'):
    X = librosa.stft(audio,n_fft=n_fft,hop_length=hop_length)
    if y_axis=='mel':
        #x_stft_db = librosa.feature.melspectrogram(x, sr=16000,n_fft=n_fft,win_length=win_length,hop_length=hop_length)
        S = librosa.feature.melspectrogram(audio, sr=16000,n_mels=n_mels,fmax=8000,n_fft=n_fft,hop_length=hop_length)
        #print (S.shape)
        if show:
            librosa.display.specshow(librosa.power_to_db(S,
                                              ref=np.max),
                             y_axis='mel',cmap='gray_r',ax=ax, fmax=8000)
        else:
            return librosa.power_to_db(S,ref=np.max)
    else:
        if show:
            specshow(librosa.amplitude_to_db(abs(X)),cmap=cm.Blues,#cm.gray_r,
                                      sr=16000,ax=ax)
        else:
            return librosa.amplitude_to_db(abs(X))
    
def MSE_pcc(A,B,ax=None):
    mse =np.mean(((A - B)**2/B.var()))
    pcc = pearsonr(A.ravel(),B.ravel())[0]
    return mse,pcc
def analyze(predict,GT_STFT_test_spkr,audio_pred,audio_gt,mode='test'):
    samples = predict.shape[0]
    pcc = np.zeros([samples])
    mse = np.zeros([samples])
    for i in range(samples):
        mse[i], pcc[i] = MSE_pcc(predict[i],GT_STFT_test_spkr[i])
        #mse[i], pcc[i] = MSE_pcc(predict[i] ,GT_STFT_test_spkr[i] )
    stois = []
    for i in range(samples):
        stois.append(stoi(audio_pred[i*interval:(i+1)*interval], audio_gt[i*interval:(i+1)*interval], 16000, extended=False))
    stois = np.array(stois)
    fig,ax=plt.subplots(1,3,figsize=(18,4))
    ax[0].hist(mse,bins=25,color='b')
    ax[0].set_title(mode+' MSE: %g(%g)' %(np.round(mse.mean(),3),np.round(mse.std(),3)))
    ax[1].hist(pcc,bins=50,color='g')
    ax[1].set_title(mode+' PCC: %g(%g)' %(np.round(pcc.mean(),3),np.round(pcc.std(),3)))
    ax[2].hist(stois,bins=50,color='r')
    ax[2].set_title(mode+' STOI: %g(%g)' %(np.round(stois.mean(),3),np.round(stois.std(),3)))
    return mse,pcc,stois
def play(audio,sr=16000):
    '''
    audio: tensor, eg: ex['audio']
    '''
    if len(audio.shape) >=2:
        audio = audio.ravel()
    display(ipd.Audio(audio,rate=sr))

audio to audio result

In [4]:
interval = 16384
wave_gt = np.load('ny742_test_wave.npy').ravel()
wave_pred = librosa.load('sample_1.pngdenoisewave.wav',sr=16000)[0]
wave_merge = np.concatenate(([np.concatenate((wave_gt[i*interval:(i+1)*interval],wave_pred[i*interval:(i+1)*interval]))\
                 for i in range(50)]))
select_word = np.loadtxt('wordlist.txt',dtype='str')

ground truth audio

In [5]:
display(ipd.Audio(wave_gt,rate=16000))

audio to audio reconstruction

In [6]:
display(ipd.Audio(wave_pred,rate=16000))

corresponding word list

CLICK HERE to show/hide WORD LIST

1 hammer start from 0s

2 vase start from 1.024s

3 umbrella start from 2.048s

4 gun start from 3.072s

5 key start from 4.096s

6 cow start from 5.12s

7 couch start from 6.144s

8 glasses start from 7.168s

9 airplane start from 8.192s

10 leg start from 9.216s

11 bee start from 10.24s

12 cow start from 11.26s

13 table start from 12.29s

14 airplane start from 13.31s

15 fork start from 14.34s

16 hammer start from 15.36s

17 gun start from 16.38s

18 kite start from 17.41s

19 cat start from 18.43s

20 umbrella start from 19.46s

21 gloves start from 20.48s

22 knife start from 21.5s

23 house start from 22.53s

24 arrow start from 23.55s

25 sheep start from 24.58s

26 pencil start from 25.6s

27 watch start from 26.62s

28 kite start from 27.65s

29 violin start from 28.67s

30 box start from 29.7s

31 toe start from 30.72s

32 window start from 31.74s

33 mouse start from 32.77s

34 ball start from 33.79s

35 hat start from 34.82s

36 pencil start from 35.84s

37 gun start from 36.86s

38 cake start from 37.89s

39 knife start from 38.91s

40 crown start from 39.94s

41 window start from 40.96s

42 fork start from 41.98s

43 hat start from 43.01s

44 well start from 44.03s

45 envelope start from 45.06s

46 bed start from 46.08s

47 hammer start from 47.1s

48 gun start from 48.13s

49 ball start from 49.15s

50 flag start from 50.18s

merged audio

In [7]:
display(ipd.Audio(wave_merge,rate=16000))
In [8]:
row_nums = 10
col_nums = 5
fig,ax=plt.subplots(row_nums*2,col_nums,figsize=(col_nums*2,row_nums*1.5))
for i in range(row_nums):
    for j in range(col_nums):
        try:
            ax[i*2,j].set_title(select_word[i*col_nums+j])
        except:
            pass
        ax[i*2,j].plot(wave_gt[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
        ax[i*2+1,j].plot(wave_pred[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
        ax[i*2,j].axis('off')
        ax[i*2+1,j].axis('off')
fig.tight_layout()

spectrogram

In [9]:
spec_pred = np.zeros([50,256,128])
spec_gt = np.zeros([50,256,128])


for i in tqdm(range(50)):
    spec_pred[i] = plot_stft(wave_pred[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None ,y_axis='linear',n_mels=64)
    spec_gt[i] = plot_stft(wave_gt[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64 )

spec_concat = np.concatenate(( np.flip(spec_gt,axis=1), np.flip(spec_pred,axis=1)),axis=1)
              #np.concatenate((np.swapaxes(spec_gt,2,1), np.swapaxes(spec_pred,2,1)),\
              #               axis=1)

In [10]:
row_nums = 10
col_nums = 5
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*2,row_nums*5))
cmap = cm.gray_r
for i in range(row_nums):
    for j in range(col_nums):
        ax[i,j].imshow(spec_concat[i*col_nums+j] ,cmap=cmap)
        try:
            ax[i,j].set_title(select_word[i*col_nums+j])
        except:
            pass
plt.tight_layout()

metrics: MSE, PCC, STOI

In [11]:
mse_test,pcc_test,stois_test = analyze(spec_pred,spec_gt,wave_pred,wave_gt)

ECoG to audio result (5 words)

In [12]:
interval = 16384
wave_a2a = librosa.load('org.wav',sr=16000)[0]
sample_num = wave_a2a.shape[0]//interval*interval
wave_a2a=wave_a2a[:sample_num *interval]
wave_pred = librosa.load('ecog.wav',sr=16000)[0][:sample_num *interval]

select_word = np.loadtxt('wordlist.txt',dtype='str')
ecog_select_words = np.array(['house','sheep','watch','hat','envelope'])
sample_ind = array([22, 24, 26, 34,  44])#np.where(np.isin(select_word,ecog_select_words))[0]
wave_gt_select = np.concatenate(([wave_gt[i*interval:(i+1)*interval] for i in sample_ind]))
wave_merge = np.concatenate(([np.concatenate((wave_gt_select[i*interval:(i+1)*interval],wave_pred[i*interval:(i+1)*interval]))\
                 for i in range(5)]))

gt audio

In [13]:
display(ipd.Audio(wave_gt_select,rate=16000))

audio to audio recon

In [14]:
display(ipd.Audio(wave_a2a,rate=16000))

ECoG to audio recon

In [15]:
display(ipd.Audio(wave_pred,rate=16000))

corresponding word list

CLICK HERE to show/hide WORD LIST

1 hammer start from 0s

2 vase start from 1.024s

3 umbrella start from 2.048s

4 gun start from 3.072s

5 key start from 4.096s

merged audio

In [16]:
display(ipd.Audio(wave_merge,rate=16000))
In [17]:
row_nums = 1
col_nums = 5
fig,ax=plt.subplots(row_nums*2,col_nums,figsize=(col_nums*2,row_nums*1.5))
for i in range(row_nums):
    for j in range(col_nums):
        try:
            ax[i*2,j].set_title(ecog_select_words[i*col_nums+j])
        except:
            pass
        ax[i*2,j].plot(wave_gt_select[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
        ax[i*2+1,j].plot(wave_pred[(i*col_nums+j)*interval:(i*col_nums+j+1)*interval])
        ax[i*2,j].axis('off')
        ax[i*2+1,j].axis('off')
fig.tight_layout()

spectrogram

In [18]:
spec_pred = np.zeros([5,256,128])
spec_gt = np.zeros([5,256,128])


for i in tqdm(range(5)):
    spec_pred[i] = plot_stft(wave_pred[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None ,y_axis='linear',n_mels=64)
    spec_gt[i] = plot_stft(wave_gt_select[i*interval:(i+1)*interval],n_fft=511,hop_length=129,ax=None,y_axis='linear',n_mels=64 )

spec_concat = np.concatenate(( np.flip(spec_gt,axis=1), np.flip(spec_pred,axis=1)),axis=1)
              #np.concatenate((np.swapaxes(spec_gt,2,1), np.swapaxes(spec_pred,2,1)),\
              #               axis=1)

In [19]:
row_nums = 1
col_nums = 5
fig,ax=plt.subplots(row_nums,col_nums,figsize=(col_nums*2,row_nums*5))
cmap = cm.gray_r
for i in range(row_nums):
    for j in range(col_nums):
        try:
            ax[i,j].imshow(spec_concat[i*col_nums+j] ,cmap=cmap)
        except:
            ax[j].imshow(spec_concat[i*col_nums+j] ,cmap=cmap)
        try:
            ax[i,j].set_title(ecog_select_words[i*col_nums+j])
        except:
            pass
        try:
            ax[j].set_title(ecog_select_words[i*col_nums+j])
        except:
            pass
plt.tight_layout()

metrics: MSE, PCC, STOI

In [20]:
mse_test,pcc_test,stois_test = analyze(spec_pred,spec_gt,wave_pred,wave_gt)
jupyter nbconvert --execute /Users/james/Desktop/videolab/ECoG/report/2020/formant_summary/1023/result.ipynb --to html_toc --ExtractOutputPreprocessor.enabled=False
rm -rf /Users/james/Desktop/website/demo_web/blog/public/formant_summary
rsync -avzh  /Users/james/Desktop/videolab/ECoG/report/2020/formant_summary /Users/james/Desktop/website/demo_web/blog/public/
cd /Users/james/Desktop/website/demo_web/blog
hgd